# -*- codeing = utf-8 -*-
# @Time : 2021/3/30 0:21
# @Author : Leon
# @File : spider02.py
# @Software: PyCharm


from urllib import request
from bs4 import BeautifulSoup

def get_data():
    url = 'https://search.51job.com/list/070200,000000,0000,00,9,99,java%25E5%25BC%2580%25E5%258F%2591,2,1.html'
    head = {
        "User-Agent": "Mozilla / 5.0(Windows NT 10.0;Win64; x64) AppleWebKit / 537.36(KHTML,like Gecko) Chrome / 89.0.4389.82 Safari / 537.36"
    }
    req = request.Request(url,headers=head)
    response = request.urlopen(req)
    # print(type(response))
    # print(type(response))  # HTTPResponse类型
    # print(response.getcode())  # 响应状态码
    # print(response.info())
    if response.getcode() == 200:
        data = response.read()  # 读取响应结果
        # print(type(data)) # bytes类型
        data = str(data, encoding='gbk')  # 转换为str
        # print(data)

        # 将数据写入文件中
        with open('index.html', mode='w', encoding='gbk') as f:
            f.write(data)

#处理数据
def parse_data():
    with open('index.html', mode='r', encoding='gbk') as f:
        html = f.read()
    #创建bes实例 解析数据
    bs = BeautifulSoup(html,'html.parser') #指定使用HTML解释器parser
    div = bs.find('div')
    print(bs.select('.in'))
    print(type(div))

if __name__ == '__main__':
    get_data()
    parse_data()